* WINDOWS PREAMBLE. 
#delimit ;
global rawDir "LOCATION OF RAW CPS DATA";
global dataDir "WORKING DIRECTORY";
global currDir "PROGRAM DIRECTORY";
cd "$currDir" ;
capture log close ;
clear all ;
log using 1_SingleWomen_ReadInCps.log, replace text;


***************************************************************************;
* READ IN CPS DATA FROM IPUMS FOR EITC EMPLOYMENT PROJECT ;
***************************************************************************;

local beginTime "DateTime: $S_DATE $S_TIME";
set more off ;
set linesize 200 ;
set mem 5g ;

cd "$dataDir" ;


	
***************************************************************************;
* DEFINE STUFF THAT WE DONT EVER WANT TO COMMENT OUT. ;
***************************************************************************;
local technical_vars "year serial relate famunit pernum wtsupp spmwt hwtsupp";
local demographic_vars "age marst race sex hispan educ
	statefip wkswork1 uhrswork wksunem1 whynwly mthwelfr schlcoll
	nfams nmothers momloc momrule poploc poprule foodstmp
	pubhous rentsub heatsub lunchsub offtotval stampval heatval union
	qincwage poverty offcutoff offpov spmpov spmthresh spmtotres spmeitc 
	spmfedtaxac spmfedtaxbc" ;
local income_vars "inctot incwage incbus incfarm incss incwelfr incgov 
	incaloth incretir incssi incdrt incint incunemp incwkcom incvet incsurv 
	incdisab incdivid incrent inceduc incchild incalim incasist incother  ";


***************************************************************************;
* READ IN IPUMS CPS DATA ;
***************************************************************************;

* A LIST OF VARIABLES WE WANT TO KEEP FROM THE DATA ;
local keepList "`technical_vars' `demographic_vars' `income_vars'" ;
local cps "cps_00017";
capture erase `cps'.do ;
capture erase `cps'.dat ;
capture erase `cps'.dat.gz ;
local from "$rawDir\\`cps'.dat.gz" ;
local to "$dataDir\\`cps'.dat.gz"  ;
copy `from' `to' , replace;
shell 7za e `cps'.dat.gz ;
local from "$rawDir\\`cps'.do" ;
local to "$dataDir\\`cps'.do"  ;
copy `from' `to' , replace;
quietly do `cps'.do ;
keep `keepList' ;
* SET MISSINGS TO ZERO - TOPCODING, MISSING VALUES ETC IS NOT AS CLEAR AS ;
* DESCRIBED IN THE DOCUMENTATION, ONE REALLY HAS TO LOOK AT THE DATA AND SEE ;
foreach var of varlist `income_vars' {;
	qui replace `var' = . if (`var' == -19998 | `var' == -9999 | 
		`var' == 9999 | `var' == 99997 | `var' == 99998 | `var' == 99999 | 
		`var' == 999997 | `var' == 9999997 | `var' == 999999 | 
		`var' == 99999997 | `var' >= 9999999 | `var' == 1099999 | `var'>1000000 |
		inctot == .);
	};	
foreach var of varlist `income_vars' {;
	qui replace `var' = 0 if `var' == . ;
	};
summ `income_vars'  ;
save WorkingFullCPSTEMP, replace ;
capture erase `cps'.do ;
capture erase `cps'.dat ;
capture erase `cps'.dat.gz ;
clear ;





***************************************************************************;
* BUILDING CHILDREN ;
***************************************************************************;
local generationvars "age sex schlcoll marst whynwly wtsupp `income_vars'" ; 
* YOUNGEST GENERATION CHILD FILE ;
use WorkingFullCPSTEMP ;
keep year serial pernum momloc poploc `generationvars' ;
foreach var of varlist pernum momloc poploc `generationvars'  {;
	rename `var' `var'G0 ;
	};
gen pernum = momlocG0 ;
replace pernum = poplocG0 if momlocG0==0 & poplocG0>0 ;
drop if pernum == 0 ;
save childrens, replace ;
clear ;
foreach gen in 1 2 3 4 5 {;
	use WorkingFullCPSTEMP ;
	keep year serial pernum momloc poploc `generationvars' ;
	foreach var of varlist momloc poploc `generationvars' {;
		rename `var' `var'G`gen' ;
		};
	sort year serial pernum ;
	merge 1:m year serial pernum using childrens ;
	drop if _merge == 1 ;
	drop _merge ;
	rm childrens.dta ;
	rename pernum pernumG`gen' ;
	gen pernum = momlocG`gen' ;
	replace pernum = poplocG`gen' if momlocG`gen'==0 ;
	count if pernum >0 & pernum!=. ;
	* BREAK LOOK IF WE CANNOT FIND ANY NEW PARENTS ;
	if r(N)==0 {;
		continue, break ;
		};
	sort year serial pernum ;
	save childrens, replace ; 
	clear ;
	};

gen generation = .;
replace generation = 1 if ageG1!=. & momlocG0!=. & momlocG0>0 & sexG1==2 & (ageG1>=24&ageG1<=48) ;
replace generation = 2 if ageG2!=. & momlocG1!=. & momlocG1>0 & sexG2==2 & (ageG2>=24&ageG2<=48) & generation==.;
replace generation = 3 if ageG3!=. & momlocG2!=. & momlocG2>0 & sexG3==2 & (ageG3>=24&ageG3<=48) & generation==.;
tab generation ;
* PERNUM WILL BE HOW WE MERGE CHILD TO MOM ;
drop pernum ;
gen pernum = . ;
replace pernum = momlocG0 if generation==1 ;
replace pernum = momlocG1 if generation==2 ;
replace pernum = momlocG2 if generation==3 ;
drop if pernum == . ;
* FINALLY THE CHILD DEMOS WE WANT TO BRINK ALONG ;
foreach var in `generationvars' {;
	rename `var'G0 `var' ;
	};
keep year serial pernum `generationvars' ;
gen qualChild = 1 if (age<=18 | (age>18 & age<24 & (schlcoll==1|schlcoll==3))) ;
* NUMBER OF CHILDREN BY AGE ;
local agelow  "0 0 6  15";
local agehigh "1 5 14 24";
foreach i of numlist 1(1)4 {;
	local al : word `i' of `agelow' ;
	local ah : word `i' of `agehigh' ;
	gen childAgeBin_`al'_`ah' = (age>=`al' & age<=`ah' & qualChild==1);
	};
summ childAgeBin_* ;
* WE NEED THE WEIGHTS FROM CHILDREN TO CONSTRUCT INDIVIDUAL ESTIMATES OF POVERTY ;
rename wtsupp chldwt ;
* INCOME VARIABLES FOR CHILDREN. SUMMATION ACROSS K ;
gen temp = 0 ;
foreach var of varlist `income_vars' {;
	if "`var'" != "inctot" {;
		qui replace temp = temp + `var' ;
		local name = substr("`var'",4,.) ;
		rename `var' chld`name' ;
		local chldinc_vars "`chldinc_vars' chld`name'";
		local name ;
		};
	};
rename temp chldtot ;
drop inctot ;
local chldinc_vars "`chldinc_vars' chldtot";
summ `chldinc_vars' ;
sort year serial pernum ;
collapse (sum) qualChild childAgeBin_* chldwt `chldinc_vars', by(year serial pernum) ;
replace year = year -1 ;
foreach var of varlist childAgeBin_* {;
	replace `var' = 1 if `var' >1 ;
	};
sort year serial pernum ;
save childrensFAMILY, replace ;
clear ;

***************************************************************************;
* NOW LETS CONSTRUCT OUR SAMPLE OF WOMEN. ;
***************************************************************************;

use WorkingFullCPSTEMP ;
keep `technical_vars' `demographic_vars' `income_vars' ;

sort year serial famunit;

* HOUSEHOLD SIZE - TOTAL ;
by year serial: gen byte hhsize = _N ;
summ hhsize ;

* FAMILY SIZE ;
by year serial famunit: gen byte famsize = _N ;
summ famsize ;
tab hhsize famsize if hhsize<=10 & famsize<=10 ;

* FAMILY KIDS ;
gen temp = (age<=18 | (age>18 & age<24 & (schlcoll==1|schlcoll==3))) ;
by year serial famunit: egen byte famkids = total(temp) ;
drop temp ;
summ famkids ;

foreach r in 201 501 701 1001 1113 1114 1115 1241 1260 {;
	gen relate_`r' = relate==`r' ;
	} ;
foreach var of varlist relate_* {;
	by year serial : egen temp = total(`var') ;
	replace `var' = temp ;
	drop temp ;
	};

gen temp = 0 ;
foreach var of varlist `income_vars' {;
	if "`var'" != "inctot" {;
		qui replace temp = temp + `var' ;
		};
	};
by year serial famunit : egen faminc = total(temp) ;
drop temp ;
*summ famunit `faminc_vars' ;

* EMPLOYED - NOTE THAT THIS IS INDIVIDUAL SPECIFIC. ;
gen byte employed = incwage > 0 & incwage != .;
*tab employed ;

* SEX ;
gen byte female = sex==2 ;
*tab year female ;

* CREATE UNIFORM EDUCATION BINS ;
gen byte hgc = . ;
replace hgc = 11 if floor(educ/10) < 7;
replace hgc = 12 if floor(educ/10) == 7;
replace hgc = 13 if floor(educ/10) > 7 & floor(educ/10) < 11;
replace hgc = 16 if floor(educ/10) == 11;
replace hgc = 17 if floor(educ/10) >= 12 & floor(educ/10) < 90 ;
*tab year hgc , missing;
* TAX YEAR ;
replace year = year - 1;


* ------------- RESTRICTIONS --------------- ;
* KEEP IF FEMALE ;
keep if female == 1 ;
count ;
* KEEP IF UNMARRIED ;
drop if marst == 1 | marst == 2 ;
count ;
* AGE RESTRICTION - NEED THIS TO BE HIGHER THAN QUAL AGE;
keep if age >= 24 & age <= 48 ;
count ;
* DROP IF THE REASON DIDNT WORK LAST YEAR WAS SICKNESS, DISABILITY, 
*	GOING TO SCHOOL ;
drop if whynwly==2 | whynwly==4 ;
count ;
* ------------- RESTRICTIONS --------------- ;



* FAMILY SPECIFIC CHILDREN ;
sort year serial pernum ;
merge 1:1 year serial pernum using childrensFAMILY ;
drop if _merge ==2 ;
drop _merge ;
foreach var of varlist qualChild childAgeBin_* {;
	replace `var' = 0 if `var' == . ;
	};
summ qualChild childAgeBin_* ;



capture rm childrensFAMILY.dta ;
capture rm childrensHOUSEHOLD.dta ;

local i = 1 ;
foreach var of varlist `income_vars' {;
	local name = substr("`var'",4,.) ;
	egen fam`name' = rowtotal(`var' chld`name') ;
	local faminc_vars "`faminc_vars' fam`name'";
	local name ;
	};
summ `faminc_vars' ;


***************************************************************************;
* RUN THROUGH TAXSIM ;
***************************************************************************;
* STATE ;
gen byte state = . ;
replace state = statefip   if statefip <= 2 ;
replace state = statefip-1 if statefip >= 4  & statefip <= 6 ;
replace state = statefip-2 if statefip >= 8  & statefip <= 13 ;
replace state = statefip-3 if statefip >= 15 & statefip <= 42 ;
replace state = statefip-4 if statefip >= 44 & statefip <= 51 ;
replace state = statefip-5 if statefip >= 53 & statefip <= 56 ;
*tab state, missing ;

gen byte depx = min(qualChild, 5) ;
gen byte depchild = depx ;
gen byte mstat = 1 ;
gen pwages = max(incwage+incbus+incfarm,0) ;
gen byte swages = 0 ;
gen byte dividends = 0 ;
gen byte agex=0 ;
gen gssi = incss+incssi+incsurv+incdisab;
gen pensions = incretir;
gen ui = incunemp;
gen otherprop = min(incwage+incbus+incfarm,0) +  
	incaloth + incalim + incother + incdrt + incint + incdivid + incrent ;
gen transfers = incwelfr+incgov+incwkcom+incvet+inceduc+incchild+incasist;

sort year serial ;
save cpsBeforeTaxsim, replace ;

keep year serial pernum state agex depx depchild mstat 
	pwages swages dividends gssi pensions ui otherprop transfers;
capture rm msg.txt;
capture rm taxsim_out.dta;
ankurs_taxsim, full replace;
capture rm msg.txt;
capture rm taxsim_out.dta;
keep year serial pernum fiitax fica siitax v25 v39 v22 v23 v19 ;
rename v25 fedEIC;
rename v39 stEIC ;  
rename v22 ctc ;
rename v23 actc ;
rename v19 fedRegTax ;
sort year serial pernum ;
save CYtaxsim, replace ;
clear ;

use cpsBeforeTaxsim ;
keep year serial pernum state agex depx depchild mstat 
	pwages swages dividends gssi pensions ui otherprop transfers;
sort year;
merge year using cpi;
keep if _merge == 3 ;
drop _merge ;
rename cpi currentYearCpi ;
replace year = year - 1 ;
sort year;
merge year using cpi;
keep if _merge == 3 ;
drop _merge ;
rename cpi priorYearCpi ;
foreach dollar of varlist pwages swages gssi 
		otherprop pensions transfers ui {;
	replace `dollar' = `dollar' * (priorYearCpi/currentYearCpi);
	};
capture rm msg.txt;
capture rm taxsim_out.dta;
ankurs_taxsim, full replace;
capture rm msg.txt; 
capture rm taxsim_out.dta;
keep year serial pernum v25 v39 v22 v23 v19 priorYearCpi currentYearCpi ;
rename v25 fedEIC_ly;
rename v39 stEIC_ly;  
rename v22 ctc_ly;
rename v23 actc_ly;
rename v19 fedRegTax_ly ;
foreach dollar of varlist fedEIC_ly stEIC_ly ctc_ly actc_ly fedRegTax_ly {;
	replace `dollar' = `dollar' * (currentYearCpi/priorYearCpi);
	};
drop priorYearCpi currentYearCpi ;
replace year = year + 1 ;
sort year serial pernum;
save LYtaxsim, replace ;
clear ;

use cpsBeforeTaxsim ;
merge 1:1 year serial pernum using CYtaxsim ;
drop _merge ;
merge 1:1 year serial pernum using LYtaxsim ;
drop _merge ;
drop depx depchild mstat pwages swages dividends gssi pensions ui 
	otherprop transfers;
*rm cpsBeforeTaxsim.dta ;
rm CYtaxsim.dta ;
rm LYtaxsim.dta ;

local taxsim_output "fiitax fica siitax fedEIC stEIC ctc actc fedRegTax 
	fedEIC_ly stEIC_ly ctc_ly actc_ly fedRegTax_ly" ;	



***************************************************************************;
* INFLATION  ;
***************************************************************************;
sort year;
merge year using cpi;
tab _merge;
keep if _merge == 3 ;
drop _merge ;
foreach dollar of varlist `income_vars' `taxsim_output' 
		stampval heatval `faminc_vars' `chldinc_vars' faminc {;
	qui replace `dollar' = `dollar' * (232.957/cpi); 
	};

replace heatval = 0 if heatval==.;
replace heatval = heatval*(1+qualChild)/(hhsize) ;
replace stampval = 0 if stampval==. ;
replace stampval = stampval*(1+qualChild)/(hhsize) ;

gen atti = inctot+stampval+heatval-fiitax-siitax-fica ;

summ ;
save cpsAfterTaxsim, replace ;



*/
*;
disp "`beginTime'" ;
disp "DateTime: $S_DATE $S_TIME";
log close ;

